suppressPackageStartupMessages({
    library(Seurat)
    library(venn)
    library(dplyr)
    library(cowplot)
    library(ggplot2)
    library(pheatmap)
    library(enrichR)
    library(rafalib)
    library(cowplot)
  library(gplots) 
})


plottheme = theme(legend.title = element_text(size = 8),
                  legend.text = element_text(size = 8),
                  axis.title = element_text(size = 8),
                  axis.text = element_text(size = 8),
                  plot.title = element_text(size = 8))


# library(future)
# plan("multiprocess", workers = 4)
# options(future.globals.maxSize = 24000 * 1024^2)

alldata <- readRDS("../analysis/filtered_IMM_DN_int_clus.rds")

# Set desired clustering resolution
sel.lev = 0.25

# Set the identity as louvain with resolution selected above
sel.clust = paste0("CCA_snn_res.",sel.lev)

alldata@active.assay = "RNA"

alldata$orig.ident = factor(alldata$orig.ident, 
                            levels = c("STD_DN_d0","GW_DN_d0",
                                       "STD_IMM_d0","GW_IMM_d0"))
alldata$Treatment = factor(alldata$Treatment, levels = c("STD","GW"))

alldata <- SetIdent(alldata, value = sel.clust)
table(alldata@active.ident)
alldata$ident_clust = paste0(alldata@active.ident,"_",alldata$orig.ident)
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 11520  7328  6802  5322  5179  4375  3126  2667  2616  2198  1918  1620  1546 
##    13    14    15    16 
##  1492  1312   567   289

1 Used clustering

plot_grid(ncol = 2, DimPlot(alldata, label = T, shuffle = TRUE) + 
            NoAxes() + NoLegend(),
          DimPlot(alldata, group.by = "orig.ident", shuffle = TRUE) + 
            NoAxes() + theme(legend.position = "bottom") +
  guides(color=guide_legend(ncol=2,override.aes = list(size = 2))))

ggplot(alldata@meta.data, aes_string(fill= sel.clust, x = "Treatment")) + plottheme + 
  geom_bar(position = "fill", color = "black") + facet_wrap(~Type) +
  theme(panel.background = element_blank(),axis.ticks = element_blank(), axis.title.y = element_blank())

frequencies = as.data.frame(table(data.frame(cluster=alldata@meta.data[,sel.clust],
                 sample = alldata$orig.ident, treatment = alldata$Treatment,
                 type = alldata$Type)))
frequencies$Proportion=NA
for(i in 1:nrow(frequencies)){
  frequencies$Proportion[i] = frequencies$Freq[i]/sum(frequencies$Freq[frequencies$sample==frequencies$sample[i]])
}

ggplot(frequencies, aes(fill=treatment, y = Proportion, x = cluster)) + plottheme + 
  geom_bar(stat = "identity",position="dodge") + facet_wrap(~type) +
  theme(panel.background = element_blank(),axis.ticks.y = element_blank())

DotPlot(alldata, features = c("Ptprc","Epcam"), 
    assay = "RNA") + coord_flip() + plottheme

2 Cluster marker genes

Top 25 (by p-value) genes with logFC>0.2 for each cluster, sorted by logFC:

if(file.exists(paste0("../analysis/clustermarkers_IMM_DN_CCA_",sel.lev,".rds"))){
  markers_genes = readRDS(paste0("../analysis/clustermarkers_IMM_DN_CCA_",sel.lev,".rds"))
}else{
  markers_genes <- FindAllMarkers(alldata, logfc.threshold = 0.2, test.use = "wilcox", 
  min.pct = 0.1, min.diff.pct = 0.2, only.pos = TRUE, max.cells.per.ident = 300, 
  assay = "RNA", random.seed = 42)
  saveRDS(markers_genes, paste0("../analysis/clustermarkers_IMM_DN_CCA_",sel.lev,".rds"))
}


top25 <- markers_genes %>% group_by(cluster) %>% top_n(-25, p_val)
top25 <- top25 %>% group_by(cluster) %>% top_n(25, avg_log2FC)


mypar(2, 5, mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F), horiz = T, 
        las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i")
    abline(v = c(0, 0.25), lty = c(1, 2))
}

Top 5 (by p-value) genes with logFC>0.2 for each cluster:

top5 <- markers_genes %>% group_by(cluster) %>% top_n(-5, p_val_adj)
top5 <- top5 %>% group_by(cluster) %>% top_n(5, avg_log2FC)

alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = sel.clust, 
    assay = "RNA") + plottheme

#pdf(paste0("../figures/clustermarkers_IMM_DN_",sel.lev,".pdf"), width = 10, height = 10)
#DoHeatmap(alldata, features =  as.character(unique(top5$gene)), assay ="RNA",label = FALSE) + labs(color = "Cluster")
#dev.off()


DotPlot(alldata, features = rev(as.character(unique(top5$gene))), 
    assay = "RNA") + coord_flip() + plottheme

# topAll = markers_genes %>% group_by(cluster)
# DoHeatmap(alldata, features =  as.character(unique(topAll$gene)), assay ="RNA",label = FALSE) + labs(color = "Cluster") 


# DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = sel.clust,
#     assay = "RNA", label = FALSE)
# 
# DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = sel.clust, 
#     assay = "RNA") + coord_flip()

Top 3 (by p-value) genes with logFC>0.2 for each cluster:

top3 <- top5 %>% group_by(cluster) %>% top_n(-3, p_val)
top3 <- top3 %>% group_by(cluster) %>% top_n(3, avg_log2FC)

VlnPlot(alldata, features = as.character(unique(top3$gene)), ncol = 4, group.by = sel.clust, 
    assay = "RNA", pt.size = -1)

for(g in as.character(unique(top3$gene))){
  print(FeaturePlot(alldata, features = g, slot = "data") +
    NoAxes() + plottheme + theme(legend.key.width = unit(5, "pt"), 
                                 legend.key.height = unit(5, "pt")))
}

3 Annotations

Based on the top DE genes, here are my predctions for the cell types represented by each cluster:

  • Cluster 0: T cells
  • Cluster 1: Adamdec1+ fibroblasts
  • Cluster 2: Plasma cells
  • Cluster 3: Dcn+ fibroblasts
  • Cluster 4: B cells
  • Cluster 5: Trophocytes
  • Cluster 6: Myeloid cells
  • Cluster 7: ILC3
  • Cluster 8: Bmp5+ fibroblasts
  • Cluster 9: Myocytes
  • Cluster 10: Endothelial
  • Cluster 11: Hhip+ nyocytes
  • Cluster 12: Lef1+ lymphocytes
  • Cluster 13: ILC2
  • Cluster 14: Proliferating
  • Cluster 15: pDCs
  • Cluster 16: LECs

4 Diet comparisons

For each cluster, here are the DE genes between the two treatments:

DGE_list_Treatment = list()

for(i in sort(unique(alldata@active.ident))){
  # select all cells in cluster i
  cell_selection <- subset(alldata, cells = colnames(alldata)[alldata@active.ident == 
      i])
  cell_selection <- SetIdent(cell_selection, value = "Treatment")
  maxcells = 50
  if(min(table(cell_selection@active.ident))<maxcells){
    maxcells = min(table(cell_selection@active.ident))
  }
  # Compute differentiall expression
  DGE_cell_selection <- FindAllMarkers(cell_selection, logfc.threshold = 0.2, test.use = "wilcox", 
      min.pct = 0.1, only.pos = TRUE, max.cells.per.ident = maxcells, 
      assay = "RNA")
  DGE_list_Treatment[[paste("Cluster",i)]] = DGE_cell_selection[DGE_cell_selection$p_val_adj<0.05,]
}

for(i in sort(unique(alldata@active.ident))){
  top20_cell_selection <- DGE_list_Treatment[[paste("Cluster",i)]]
  # print(VlnPlot(subset(alldata, idents = i),unique(top20_cell_selection$gene), 
  #         group.by = "orig.ident", assay = "RNA") + theme(axis.text = element_text(size = 1)))
  if(nrow(top20_cell_selection)>0){
      print(DotPlot(subset(alldata, idents = i), features = rev(as.character(unique(top20_cell_selection$gene))), 
      assay = "RNA", group.by = "Treatment") + coord_flip() + plottheme +
        labs(title = paste("Cluster",i,paste(top3$gene[top3$cluster==i],collapse = ", "))))
  
  }
}